{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Ejercicio Regresión Logística\n", "\n", "Crear un clasificador basado en el algoritmo de regresión logistica para predecir si el valor de la vivienda supera la media\n", "\n", "entrada: housing.csv\n", "\n", "Procedimiento:\n", "- Cargar los datos los datos a un DataFrame y explorar brevemente\n", "- Eliminar las observaciones que tengan algun dato faltante\n", "- Eliminar las observaciones con el valor atípico (max) para la variable 'median_house_value'\n", "- Aplicar one hot encoding a la variable 'ocean_proximity'\n", "- Crear una nueva variable boolean 'above_median'\n", "- Aplicar los pasos train-test-split para poder entrenar y evaluar el clasificador\n", "\n", "Cuáles los valores para accuracy, matriz de confusion, precision, recall y f1 del clasificador?" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.datasets import load_boston" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomeexpected_house_valueocean_proximity
0-122.2337.8841.0880.0129.0322.0126.08.3252452600.0NEAR BAY
1-122.2237.8621.07099.01106.02401.01138.08.3014358500.0NEAR BAY
2-122.2437.8552.01467.0190.0496.0177.07.2574352100.0NEAR BAY
3-122.2537.8552.01274.0235.0558.0219.05.6431341300.0NEAR BAY
4-122.2537.8552.01627.0280.0565.0259.03.8462342200.0NEAR BAY
\n", "
" ], "text/plain": [ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", "0 -122.23 37.88 41.0 880.0 129.0 \n", "1 -122.22 37.86 21.0 7099.0 1106.0 \n", "2 -122.24 37.85 52.0 1467.0 190.0 \n", "3 -122.25 37.85 52.0 1274.0 235.0 \n", "4 -122.25 37.85 52.0 1627.0 280.0 \n", "\n", " population households median_income expected_house_value ocean_proximity \n", "0 322.0 126.0 8.3252 452600.0 NEAR BAY \n", "1 2401.0 1138.0 8.3014 358500.0 NEAR BAY \n", "2 496.0 177.0 7.2574 352100.0 NEAR BAY \n", "3 558.0 219.0 5.6431 341300.0 NEAR BAY \n", "4 565.0 259.0 3.8462 342200.0 NEAR BAY " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Cargar los datos los datos a un DataFrame y explorar brevemente\n", "housing_csv_df = pd.read_csv(os.path.join(\"\", \"housing.csv\"))\n", "housing_csv_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Eliminar las observaciones que tengan algun dato faltante\n", "housing_csv_df.loc[housing_csv_df['total_bedrooms'].isnull()]\n", "housing_sin_valores_na = housing_csv_df.dropna(how='any')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD5CAYAAADSiMnIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAATa0lEQVR4nO3dfbBd1X3e8e9jBIbYvCM8VCIWjdUWjG0M15iWvsSmIwTORLSGGpoGjYeJWkrStE4nlt02+KWewDhTEhpMrBSCaNxg6phCDbZQwY6djm242JjXYCmEGI0Yc4kwpkP8Avn1j7NEDpez7j0ScK5evp+ZM2fv3157r7U1o/PcvfY+96aqkCRplFct9AAkSbsuQ0KS1GVISJK6DAlJUpchIUnqWrTQA3i5HXHEEbVs2bKFHoYk7VbuuuuuJ6pq8ez6HhcSy5YtY3p6eqGHIUm7lSR/PqrudJMkqcuQkCR1GRKSpC5DQpLUZUhIkrrGCokkjyS5N8ndSaZb7bAkG5Nsau+HtnqSXJ5kc5J7kpw4dJzVrf2mJKuH6ie1429u+2auPqTdSZIXvaTdxY5cSbyjqk6oqqm2vha4raqWA7e1dYAzgOXttQa4EgYf+MDFwNuBk4GLhz70r2xtt++3cp4+pN1CLxAMCu0uXsp00ypgfVteD5w1VL+2Br4GHJLkKOB0YGNVbauqJ4GNwMq27aCq+moNfm/5tbOONaoPSdIEjBsSBdya5K4ka1rtdVX1GEB7P7LVlwCPDu27pdXmqm8ZUZ+rjxdIsibJdJLpmZmZMU9JkjSfcb9xfWpVbU1yJLAxyZ/M0XbUdXTtRH1sVbUOWAcwNTXlX1GSpJfJWFcSVbW1vT8O3MDgnsJ321QR7f3x1nwLcPTQ7kuBrfPUl46oM0cfkqQJmDckkrwmyYHbl4EVwH3ATcD2J5RWAze25ZuA89tTTqcAT7Wpog3AiiSHthvWK4ANbdvTSU5pTzWdP+tYo/qQJE3AONNNrwNuaE9jLAL+R1V9IcmdwPVJLgC+A5zT2t8CnAlsBp4B3gtQVduSfBS4s7X7SFVta8sXAtcABwCfby+ASzp9SJImIIMHivYcU1NT5W+B1a5irkdd97T/e9q9Jblr6CsOz/Mb15KkLkNCktS1x/3RIWlSXuq3psfd32kpLSRDQtpJ43x4e09Cuzunm6RXUC8IDAjtLgwJ6RVWVVQVr3//555flnYXhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1jR0SSfZJ8s0kn2vrxyT5epJNST6dZL9Wf3Vb39y2Lxs6xgda/aEkpw/VV7ba5iRrh+oj+5AkTcaOXEn8MvDg0PqlwGVVtRx4Erig1S8AnqyqNwCXtXYkOQ44F3gjsBL4RAuefYArgDOA44DzWtu5+pAkTcBYIZFkKfAu4L+19QDvBD7TmqwHzmrLq9o6bftprf0q4Lqq+mFV/RmwGTi5vTZX1cNV9SPgOmDVPH1IkiZg3CuJ3wR+Ffirtn448L2qeratbwGWtOUlwKMAbftTrf3z9Vn79Opz9fECSdYkmU4yPTMzM+YpSZLmM29IJPkZ4PGqumu4PKJpzbPt5aq/uFi1rqqmqmpq8eLFo5pIknbCojHanAr8bJIzgf2BgxhcWRySZFH7SX8psLW13wIcDWxJsgg4GNg2VN9ueJ9R9Sfm6EOSNAHzXklU1QeqamlVLWNw4/n2qvo54IvA2a3ZauDGtnxTW6dtv72qqtXPbU8/HQMsB+4A7gSWtyeZ9mt93NT26fUhSZqAl/I9ifcD70uymcH9g6ta/Srg8FZ/H7AWoKruB64HHgC+AFxUVc+1q4RfBDYweHrq+tZ2rj4kSRMwznTT86rqS8CX2vLDDJ5Mmt3mB8A5nf0/BnxsRP0W4JYR9ZF9SJImw29cS5K6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktQ1b0gk2T/JHUm+leT+JB9u9WOSfD3JpiSfTrJfq7+6rW9u25cNHesDrf5QktOH6itbbXOStUP1kX1IkiZjnCuJHwLvrKq3ACcAK5OcAlwKXFZVy4EngQta+wuAJ6vqDcBlrR1JjgPOBd4IrAQ+kWSfJPsAVwBnAMcB57W2zNGHJGkC5g2JGvh/bXXf9irgncBnWn09cFZbXtXWadtPS5JWv66qflhVfwZsBk5ur81V9XBV/Qi4DljV9un1IUmagLHuSbSf+O8GHgc2An8KfK+qnm1NtgBL2vIS4FGAtv0p4PDh+qx9evXD5+hj9vjWJJlOMj0zMzPOKUmSxjBWSFTVc1V1ArCUwU/+x45q1t7T2fZy1UeNb11VTVXV1OLFi0c1kSTthB16uqmqvgd8CTgFOCTJorZpKbC1LW8BjgZo2w8Gtg3XZ+3Tqz8xRx+SpAkY5+mmxUkOacsHAP8YeBD4InB2a7YauLEt39TWadtvr6pq9XPb00/HAMuBO4A7geXtSab9GNzcvqnt0+tDkjQBi+ZvwlHA+vYU0quA66vqc0keAK5L8p+BbwJXtfZXAf89yWYGVxDnAlTV/UmuBx4AngUuqqrnAJL8IrAB2Ae4uqrub8d6f6cPSdIEzBsSVXUP8NYR9YcZ3J+YXf8BcE7nWB8DPjaifgtwy7h9SJImw29cS5K6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktQ1zjeupT3eWz58K0/95Y9f8X6Wrb35FT3+wQfsy7cuXvGK9qG9iyEhAU/95Y955JJ3LfQwXrJXOoS093G6SZLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEld84ZEkqOTfDHJg0nuT/LLrX5Yko1JNrX3Q1s9SS5PsjnJPUlOHDrW6tZ+U5LVQ/WTktzb9rk8SebqQ5I0GeNcSTwL/EpVHQucAlyU5DhgLXBbVS0HbmvrAGcAy9trDXAlDD7wgYuBtwMnAxcPfehf2dpu329lq/f6kCRNwLwhUVWPVdU32vLTwIPAEmAVsL41Ww+c1ZZXAdfWwNeAQ5IcBZwObKyqbVX1JLARWNm2HVRVX62qAq6ddaxRfUiSJmCH7kkkWQa8Ffg68LqqegwGQQIc2ZotAR4d2m1Lq81V3zKizhx9SJImYOyQSPJa4A+Bf1tV35+r6Yha7UR9bEnWJJlOMj0zM7Mju0qS5jBWSCTZl0FAfKqqPtvK321TRbT3x1t9C3D00O5Lga3z1JeOqM/VxwtU1bqqmqqqqcWLF49zSpKkMYzzdFOAq4AHq+q/DG26Cdj+hNJq4Mah+vntKadTgKfaVNEGYEWSQ9sN6xXAhrbt6SSntL7On3WsUX1IkiZg0RhtTgV+Hrg3yd2t9kHgEuD6JBcA3wHOadtuAc4ENgPPAO8FqKptST4K3NnafaSqtrXlC4FrgAOAz7cXc/QhSZqAeUOiqv6Y0fcNAE4b0b6AizrHuhq4ekR9Gjh+RP0vRvUhSZoMv3EtSeoyJCRJXYaEJKlrnBvX0h7vwGPX8qb1u/9vfTnwWIB3LfQwtAcxJCTg6Qcv4ZFLdv8P12Vrb17oIWgP43STJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlS16KFHoC0q1i29uaFHsJLdvAB+y70ELSHMSQk4JFL3vWK97Fs7c0T6Ud6OTndJEnqMiQkSV2GhCSpa96QSHJ1kseT3DdUOyzJxiSb2vuhrZ4klyfZnOSeJCcO7bO6td+UZPVQ/aQk97Z9Lk+SufqQJE3OOFcS1wArZ9XWArdV1XLgtrYOcAawvL3WAFfC4AMfuBh4O3AycPHQh/6Vre32/VbO04ckaULmDYmq+jKwbVZ5FbC+La8HzhqqX1sDXwMOSXIUcDqwsaq2VdWTwEZgZdt2UFV9taoKuHbWsUb1IUmakJ29J/G6qnoMoL0f2epLgEeH2m1ptbnqW0bU5+rjRZKsSTKdZHpmZmYnT0mSNNvLfeM6I2q1E/UdUlXrqmqqqqYWL168o7tLkjp2NiS+26aKaO+Pt/oW4OihdkuBrfPUl46oz9WHJGlCdjYkbgK2P6G0GrhxqH5+e8rpFOCpNlW0AViR5NB2w3oFsKFtezrJKe2ppvNnHWtUH5KkCZn313Ik+QPgp4Ejkmxh8JTSJcD1SS4AvgOc05rfApwJbAaeAd4LUFXbknwUuLO1+0hVbb8ZfiGDJ6gOAD7fXszRhyRpQuYNiao6r7PptBFtC7ioc5yrgatH1KeB40fU/2JUH5KkyfEb15KkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6lq00AOQdldJdnyfS3e8n6ra8Z2kl4khIe0kP7y1N3C6SZLUtcuHRJKVSR5KsjnJ2oUejyTtTXbpkEiyD3AFcAZwHHBekuMWdlSStPfYpUMCOBnYXFUPV9WPgOuAVQs8Jknaa+zqIbEEeHRofUurvUCSNUmmk0zPzMxMbHCStKfb1UNi1DOGL3qkpKrWVdVUVU0tXrx4AsOSpL3Drh4SW4Cjh9aXAlsXaCyStNfZ1UPiTmB5kmOS7AecC9y0wGOSpL1GdvUvBCU5E/hNYB/g6qr62DztZ4A/n8TYpB10BPDEQg9C6nh9Vb1ovn6XDwlpT5FkuqqmFnoc0o7Y1aebJEkLyJCQJHUZEtLkrFvoAUg7ynsSkqQuryQkSV2GhCSpy5CQJHUZEtqrJVmW5J/vxH7XJDl7ju2PJDnipY1uYcx3btq7GBLa2y0DdjgkpL2FIaFdSpJ/keSOJHcn+WSS1yfZlOSIJK9K8pUkK9oVwJ8kWZ/kniSfSfIT7RgnJfmjJHcl2ZDkqFZ/Q5L/k+RbSb6R5KeAS4B/0Pr7d0n2SfLxJHe24/7Ltm+S/HaSB5LcDBw5xun8Uuvn3iR/px3nsCT/qx37a0ne3OofSvLvh/4d7mvn+JokN7cx35fkPXOd44h/z2OT3DG0vizJPW3519p53pdkXZIX/dbl4SuiJFNJvtSWX5Pk6rb/N5P4d172UIaEdhlJjgXeA5xaVScAzwH/CLgU+B3gV4AHqurWtsvfBtZV1ZuB7wP/Osm+wH8Fzq6qk4Crge2/7+tTwBVV9Rbg7wGPAWuBr1TVCVV1GXAB8FRVvQ14G/ALSY4B/knr703AL7T95/NEVZ0IXAlsD4APA99sY/4gcO08x1gJbK2qt1TV8cAX5jnHF6iqB4H9kvzNVnoPcH1b/u2qels77gHAz4xxTtv9B+D29u/0DuDjSV6zA/trN7FooQcgDTkNOAm4s/1QewDweFV9KMk5wL8CThhq/2hV/d+2/PvAvwG+ABwPbGzH2Ad4LMmBwJKqugGgqn4AMOKH5xXAm4fm5A8GlgP/EPiDqnoO2Jrk9jHO57Pt/S7gn7blvw+8u43h9iSHJzl4jmPcC/xGkkuBz1XVV5IcP+oc5zjG9cA/Y3DV9J72AnhHkl8FfgI4DLgf+N9jnBcM/p1+dujqZ3/gJ4EHx9xfuwlDQruSAOur6gMvKA6mkZa21dcCT7fl2d8ErXaM+6vq7846xkE7MIZfqqoNs/Y/c0R/8/lhe3+Ov/6/1vtDWs/ywiv7/QGq6ttJTgLOBH49ya3ADYw4xzl8GvifST47OGRtSrI/8AlgqqoeTfKh7X3OMjyu4e0B3l1VD405Bu2mnG7SruQ24OwkR8Lz8/evZzDd9Cng14DfHWr/k0m2f1CeB/wx8BCweHs9yb5J3lhV3we2JDmr1V/dwudp4MChY24ALmxTOiT5W20a5cvAue2exVEMplh2xpeBn2vH/mkGU1LfBx4BTmz1E4Fj2vLfAJ6pqt8HfqO1GXmOvQ6r6k8ZBNV/YhAY8Ncf+E8keS3Qe5rpEQZXd9CugJoNDO65pI3hrfOfunZHXklol1FVDyT5j8CtSV4F/Bh4H4N7A6dW1XNJ3p3kvcAXGUxtrE7ySWATcGVV/ahNFV3epnEWMfh7JPcDPw98MslH2rHPAe4Bnk3yLeAa4LcYPPH0jfYBOAOcxeCn93cymP75NvBHO3maHwJ+r908fgZY3ep/CJyf5G4Gf2zr263+Jgbz/X/VxnzhPOfY82ng47TwqarvJfnddj6PtD5H+TBwVZIPAl8fqn+09XlP+3d6hB27p6HdhL+7SbulJMsYzNEfv8BDkfZoTjdJkrq8kpBegiQ30KZwhrx/9o3vCY3lCuDUWeXfqqrfm/RYtOcwJCRJXU43SZK6DAlJUpchIUnqMiQkSV3/H2CP6LtepcZrAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Eliminar las observaciones con el valor atípico (max) para la variable 'median_house_value'\n", "housing_sin_valores_na['expected_house_value'].plot.box()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD7CAYAAACfQGjDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAATtElEQVR4nO3dfbBd1X3e8e9jBIbYvHPxUAQWjdUWjG0M15iWvjimIwuciWgNNTgNGg8TWkpct04nyHYb/FKmUGdKTIOJSSGIxgVTxxTVgIUKJnY6xiBsDAgFSyGK0YgxIsKYDvEL5Nc/zhI5XM6690jAvXr5fmbunL1/e+291taMznP32vvck6pCkqRRXjPXA5Ak7bgMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdY0VEkk2JHkwyf1JVrfaQUlWJVnXXg9s9SS5PMn6JA8kOX7oOEtb+3VJlg7VT2jHX9/2zXR9SJJmR8b5nESSDcBkVT05VPvPwJaquiTJMuDAqrowyWnAh4DTgHcCn62qdyY5CFgNTAIF3AecUFVPJbkH+DBwN3ArcHlV3dbrY7qxHnLIIbVgwYJt/GeQpN3bfffd92RVTUytz3sZx1wCvKstLwfuAi5s9etqkD53JzkgyWGt7aqq2gKQZBWwOMldwH5V9c1Wvw44Hbhtmj66FixYwOrVq1/GaUnS7ifJn4+qj3tPooDbk9yX5LxWe0NVPQ7QXg9t9cOBx4b23dhq09U3jqhP14ckaRaMeyVxclVtSnIosCrJn0zTNiNqtR31sbXgOg/gyCOP3JZdJUnTGOtKoqo2tdcngJuAE4EftGkk2usTrflG4Iih3ecDm2aozx9RZ5o+po7vqqqarKrJiYmXTKlJkrbTjCGR5HVJ9t26DCwCHgJWAFufUFoK3NyWVwDntKecTgKeblNFK4FFSQ5sTyktAla2bc8kOak91XTOlGON6kOSNAvGmW56A3BTeyp1HvA/quqrSe4FbkxyLvB94MzW/lYGTzatB54FPghQVVuSfBq4t7X71Nab2MD5wLXAPgxuWN/W6pd0+pAkzYKxHoHdmUxOTpZPN2lHcv3113PxxRezdu1ajj76aD7+8Y9z9tlnz/WwpBdJcl9VTU6tv5xHYCXN4Prrr+cDH/jAC+tr1qx5Yd2g0M7AKwnpVdSmaUfa1f7vaefWu5LwbzdJkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSuvz6Umk7Tfetc6/k/n6DneaSISFtp3HevP36Uu3snG6SJHUZEtKrqHe14FWEdhaGhPQqqyqqijde+JUXlqWdhSEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV1jh0SSPZJ8J8lX2vpRSb6VZF2SLybZq9Vf29bXt+0Lho7x0VZ/JMl7huqLW219kmVD9ZF9SJJmx7ZcSXwYWDu0filwWVUtBJ4Czm31c4GnqupNwGWtHUmOAc4C3gwsBj7XgmcP4ArgVOAY4OzWdro+JEmzYKyQSDIfeC/w39p6gHcDX2pNlgOnt+UlbZ22/ZTWfglwQ1X9pKr+DFgPnNh+1lfVo1X1U+AGYMkMfUiSZsG4VxK/DfwG8Fdt/WDgh1X1XFvfCBzelg8HHgNo259u7V+oT9mnV5+ujxdJcl6S1UlWb968ecxTkiTNZMaQSPKLwBNVdd9weUTTmmHbK1V/abHqqqqarKrJiYmJUU0kSdthnK8vPRn4pSSnAXsD+zG4sjggybz2m/58YFNrvxE4AtiYZB6wP7BlqL7V8D6j6k9O04ckaRbMeCVRVR+tqvlVtYDBjec7q+qXga8BZ7RmS4Gb2/KKtk7bfmcNvmVlBXBWe/rpKGAhcA9wL7CwPcm0V+tjRdun14ckaRa8nM9JXAh8JMl6BvcPrm71q4GDW/0jwDKAqloD3Ag8DHwVuKCqnm9XCb8GrGTw9NSNre10fUiSZsE4000vqKq7gLva8qMMnkya2ubHwJmd/S8GLh5RvxW4dUR9ZB+SpNnhJ64lSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldM4ZEkr2T3JPku0nWJPlkqx+V5FtJ1iX5YpK9Wv21bX19275g6FgfbfVHkrxnqL641dYnWTZUH9mHJGl2jHMl8RPg3VX1NuA4YHGSk4BLgcuqaiHwFHBua38u8FRVvQm4rLUjyTHAWcCbgcXA55LskWQP4ArgVOAY4OzWlmn6kCTNghlDogb+X1vds/0U8G7gS62+HDi9LS9p67TtpyRJq99QVT+pqj8D1gMntp/1VfVoVf0UuAFY0vbp9SFJmgVj3ZNov/HfDzwBrAL+FPhhVT3XmmwEDm/LhwOPAbTtTwMHD9en7NOrHzxNH1PHd16S1UlWb968eZxTkiSNYayQqKrnq+o4YD6D3/yPHtWsvaaz7ZWqjxrfVVU1WVWTExMTo5pIkrbDNj3dVFU/BO4CTgIOSDKvbZoPbGrLG4EjANr2/YEtw/Up+/TqT07ThyRpFozzdNNEkgPa8j7APwbWAl8DzmjNlgI3t+UVbZ22/c6qqlY/qz39dBSwELgHuBdY2J5k2ovBze0VbZ9eH5KkWTBv5iYcBixvTyG9Brixqr6S5GHghiT/EfgOcHVrfzXw35OsZ3AFcRZAVa1JciPwMPAccEFVPQ+Q5NeAlcAewDVVtaYd68JOH5KkWTBjSFTVA8DbR9QfZXB/Ymr9x8CZnWNdDFw8on4rcOu4fUiSZoefuJYkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1jfP1pdIu722fvJ2n//Jnr3o/C5bd8qoef/999uS7Fy16VfvQ7sWQkICn//JnbLjkvXM9jJft1Q4h7X6cbpIkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6poxJJIckeRrSdYmWZPkw61+UJJVSda11wNbPUkuT7I+yQNJjh861tLWfl2SpUP1E5I82Pa5PEmm60OSNDvGuZJ4Dvj1qjoaOAm4IMkxwDLgjqpaCNzR1gFOBRa2n/OAK2Hwhg9cBLwTOBG4aOhN/8rWdut+i1u914ckaRbMGBJV9XhVfbstPwOsBQ4HlgDLW7PlwOlteQlwXQ3cDRyQ5DDgPcCqqtpSVU8Bq4DFbdt+VfXNqirguinHGtWHJGkWbNM9iSQLgLcD3wLeUFWPwyBIgENbs8OBx4Z229hq09U3jqgzTR+SpFkwdkgkeT3wh8C/qaofTdd0RK22oz62JOclWZ1k9ebNm7dlV0nSNMYKiSR7MgiIL1TVl1v5B22qiPb6RKtvBI4Y2n0+sGmG+vwR9en6eJGquqqqJqtqcmJiYpxTkiSNYZynmwJcDaytqv8ytGkFsPUJpaXAzUP1c9pTTicBT7epopXAoiQHthvWi4CVbdszSU5qfZ0z5Vij+pAkzYJ5Y7Q5GfgV4MEk97fax4BLgBuTnAt8HzizbbsVOA1YDzwLfBCgqrYk+TRwb2v3qara0pbPB64F9gFuaz9M04ckaRbMGBJV9ceMvm8AcMqI9gVc0DnWNcA1I+qrgWNH1P9iVB+SpNnhJ64lSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSusb5nIS0y9v36GW8ZfnO/0eG9z0a4L1zPQztQgwJCXhm7SVsuGTnf3NdsOyWuR6CdjFON0mSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHXNm+sBSDuKBctumeshvGz777PnXA9BuxhDQgI2XPLeV72PBctumZV+pFeS002SpK4ZQyLJNUmeSPLQUO2gJKuSrGuvB7Z6klyeZH2SB5IcP7TP0tZ+XZKlQ/UTkjzY9rk8SabrQ5I0e8a5krgWWDyltgy4o6oWAne0dYBTgYXt5zzgShi84QMXAe8ETgQuGnrTv7K13brf4hn6kCTNkhlDoqq+DmyZUl4CLG/Ly4HTh+rX1cDdwAFJDgPeA6yqqi1V9RSwCljctu1XVd+sqgKum3KsUX1IkmbJ9t6TeENVPQ7QXg9t9cOBx4babWy16eobR9Sn60OSNEte6RvXGVGr7ahvW6fJeUlWJ1m9efPmbd1dktSxvSHxgzZVRHt9otU3AkcMtZsPbJqhPn9Efbo+XqKqrqqqyaqanJiY2M5TkiRNtb0hsQLY+oTSUuDmofo57Smnk4Cn21TRSmBRkgPbDetFwMq27ZkkJ7Wnms6ZcqxRfUiSZsmMH6ZLcj3wLuCQJBsZPKV0CXBjknOB7wNntua3AqcB64FngQ8CVNWWJJ8G7m3tPlVVW2+Gn8/gCap9gNvaD9P0IUmaJTOGRFWd3dl0yoi2BVzQOc41wDUj6quBY0fU/2JUH5Kk2eMnriVJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUte8uR6AtLNKsu37XLrt/VTVtu8kvUIMCWk7+eat3YHTTZKkrh0+JJIsTvJIkvVJls31eCRpd7JDh0SSPYArgFOBY4Czkxwzt6OSpN3HDh0SwInA+qp6tKp+CtwALJnjMUnSbmNHD4nDgceG1je22oskOS/J6iSrN2/ePGuDk6Rd3Y4eEqOeMXzJIyVVdVVVTVbV5MTExCwMS5J2Dzt6SGwEjhhanw9smqOxSNJuZ0cPiXuBhUmOSrIXcBawYo7HJEm7jezoHwhKchrw28AewDVVdfEM7TcDfz4bY5O20SHAk3M9CKnjjVX1kvn6HT4kpF1FktVVNTnX45C2xY4+3SRJmkOGhCSpy5CQZs9Vcz0AaVt5T0KS1OWVhCSpy5CQJHUZEtqtJVmQ5APbsd+1Sc6YZvuGJIe8vNHNjZnOTbsXQ0K7uwXANoeEtLswJLRDSfLPk9yT5P4kn0/yxiTrkhyS5DVJvpFkUbsC+JMky5M8kORLSX6uHeOEJH+U5L4kK5Mc1upvSvJ/knw3ybeT/DxwCfAPWn//NskeST6T5N523H/R9k2S30nycJJbgEPHOJ0PtX4eTPJ32nEOSvK/2rHvTvLWVv9Ekn839O/wUDvH1yW5pY35oSTvn+4cR/x7Hp3knqH1BUkeaMu/2c7zoSRXZcSXdg9fESWZTHJXW35dkmva/t9J4p/w30UZEtphJDkaeD9wclUdBzwP/CPgUuB3gV8HHq6q29sufxu4qqreCvwI+FdJ9gT+K3BGVZ0AXANs/VMuXwCuqKq3AX8PeBxYBnyjqo6rqsuAc4Gnq+odwDuAX01yFPBPWn9vAX617T+TJ6vqeOBKYGsAfBL4Thvzx4DrZjjGYmBTVb2tqo4FvjrDOb5IVa0F9kryN1vp/cCNbfl3quod7bj7AL84xjlt9XHgzvbv9AvAZ5K8bhv2105i3lwPQBpyCnACcG/7pXYf4Imq+kSSM4F/CRw31P6xqvq/bfkPgH8NfBU4FljVjrEH8HiSfYHDq+omgKr6McCIX54XAW8dmpPfH1gI/EPg+qp6HtiU5M4xzufL7fU+4J+25b8PvK+N4c4kByfZf5pjPAj8VpJLga9U1TeSHDvqHKc5xo3AP2Nw1fT+9gPwC0l+A/g54CBgDfC/xzgvGPw7/dLQ1c/ewJHA2jH3107CkNCOJMDyqvroi4qDaaT5bfX1wDNteeqHfKodY01V/d0px9hvG8bwoapaOWX/00b0N5OftNfn+ev/a73vSHmOF1/Z7w1QVd9LcgJwGvCfktwO3MSIc5zGF4H/meTLg0PWuiR7A58DJqvqsSSf2NrnFMPjGt4e4H1V9ciYY9BOyukm7UjuAM5Icii8MH//RgbTTV8AfhP4vaH2RybZ+kZ5NvDHwCPAxNZ6kj2TvLmqfgRsTHJ6q7+2hc8zwL5Dx1wJnN+mdEjyt9o0yteBs9o9i8MYTLFsj68Dv9yO/S4GU1I/AjYAx7f68cBRbflvAM9W1R8Av9XajDzHXodV9acMguo/MAgM+Os3/CeTvB7oPc20gcHVHbQroGYlg3suaWN4+8ynrp2RVxLaYVTVw0n+PXB7ktcAPwM+wuDewMlV9XyS9yX5IPA1BlMbS5N8HlgHXFlVP21TRZe3aZx5DP7U/BrgV4DPJ/lUO/aZwAPAc0m+C1wLfJbBE0/fbm+Am4HTGfz2/m4G0z/fA/5oO0/zE8Dvt5vHzwJLW/0PgXOS3M/ge1S+1+pvYTDf/1dtzOfPcI49XwQ+Qwufqvphkt9r57Oh9TnKJ4Grk3wM+NZQ/dOtzwfav9MGtu2ehnYS/lkO7ZSSLGAwR3/sHA9F2qU53SRJ6vJKQnoZktxEm8IZcuHUG9+zNJYrgJOnlD9bVb8/22PRrsOQkCR1Od0kSeoyJCRJXYaEJKnLkJAkdRkSkqSu/w/ooRM2C0TcgQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "q1 = housing_sin_valores_na['expected_house_value'].quantile(.25)\n", "q3 = housing_sin_valores_na['expected_house_value'].quantile(.75)\n", "iqr = q3 - q1\n", "indices_val_atipicos_max = housing_sin_valores_na.loc[housing_sin_valores_na['expected_house_value'] > q3 + 1.5*iqr].index\n", "housing_sin_val_atip = housing_sin_valores_na.drop(indices_val_atipicos_max, axis=0)\n", "housing_sin_val_atip['expected_house_value'].plot.box()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomeexpected_house_valueocean_proximity_<1H OCEANocean_proximity_INLANDocean_proximity_ISLANDocean_proximity_NEAR BAYocean_proximity_NEAR OCEAN
0-122.2337.8841.0880.0129.0322.0126.08.3252452600.000010
1-122.2237.8621.07099.01106.02401.01138.08.3014358500.000010
2-122.2437.8552.01467.0190.0496.0177.07.2574352100.000010
3-122.2537.8552.01274.0235.0558.0219.05.6431341300.000010
4-122.2537.8552.01627.0280.0565.0259.03.8462342200.000010
.............................................
20635-121.0939.4825.01665.0374.0845.0330.01.560378100.001000
20636-121.2139.4918.0697.0150.0356.0114.02.556877100.001000
20637-121.2239.4317.02254.0485.01007.0433.01.700092300.001000
20638-121.3239.4318.01860.0409.0741.0349.01.867284700.001000
20639-121.2439.3716.02785.0616.01387.0530.02.388689400.001000
\n", "

19369 rows × 14 columns

\n", "
" ], "text/plain": [ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", "0 -122.23 37.88 41.0 880.0 129.0 \n", "1 -122.22 37.86 21.0 7099.0 1106.0 \n", "2 -122.24 37.85 52.0 1467.0 190.0 \n", "3 -122.25 37.85 52.0 1274.0 235.0 \n", "4 -122.25 37.85 52.0 1627.0 280.0 \n", "... ... ... ... ... ... \n", "20635 -121.09 39.48 25.0 1665.0 374.0 \n", "20636 -121.21 39.49 18.0 697.0 150.0 \n", "20637 -121.22 39.43 17.0 2254.0 485.0 \n", "20638 -121.32 39.43 18.0 1860.0 409.0 \n", "20639 -121.24 39.37 16.0 2785.0 616.0 \n", "\n", " population households median_income expected_house_value \\\n", "0 322.0 126.0 8.3252 452600.0 \n", "1 2401.0 1138.0 8.3014 358500.0 \n", "2 496.0 177.0 7.2574 352100.0 \n", "3 558.0 219.0 5.6431 341300.0 \n", "4 565.0 259.0 3.8462 342200.0 \n", "... ... ... ... ... \n", "20635 845.0 330.0 1.5603 78100.0 \n", "20636 356.0 114.0 2.5568 77100.0 \n", "20637 1007.0 433.0 1.7000 92300.0 \n", "20638 741.0 349.0 1.8672 84700.0 \n", "20639 1387.0 530.0 2.3886 89400.0 \n", "\n", " ocean_proximity_<1H OCEAN ocean_proximity_INLAND \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "20635 0 1 \n", "20636 0 1 \n", "20637 0 1 \n", "20638 0 1 \n", "20639 0 1 \n", "\n", " ocean_proximity_ISLAND ocean_proximity_NEAR BAY \\\n", "0 0 1 \n", "1 0 1 \n", "2 0 1 \n", "3 0 1 \n", "4 0 1 \n", "... ... ... \n", "20635 0 0 \n", "20636 0 0 \n", "20637 0 0 \n", "20638 0 0 \n", "20639 0 0 \n", "\n", " ocean_proximity_NEAR OCEAN \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "... ... \n", "20635 0 \n", "20636 0 \n", "20637 0 \n", "20638 0 \n", "20639 0 \n", "\n", "[19369 rows x 14 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Aplicar one hot encoding a la variable 'ocean_proximity'\n", "housing_procesado = pd.get_dummies(housing_sin_val_atip, columns=['ocean_proximity'])\n", "housing_procesado" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomeexpected_house_valueocean_proximity_<1H OCEANocean_proximity_INLANDocean_proximity_ISLANDocean_proximity_NEAR BAYocean_proximity_NEAR OCEANabove_median
0-122.2337.8841.0880.0129.0322.0126.08.3252452600.000010True
1-122.2237.8621.07099.01106.02401.01138.08.3014358500.000010True
2-122.2437.8552.01467.0190.0496.0177.07.2574352100.000010True
3-122.2537.8552.01274.0235.0558.0219.05.6431341300.000010True
4-122.2537.8552.01627.0280.0565.0259.03.8462342200.000010True
................................................
20635-121.0939.4825.01665.0374.0845.0330.01.560378100.001000False
20636-121.2139.4918.0697.0150.0356.0114.02.556877100.001000False
20637-121.2239.4317.02254.0485.01007.0433.01.700092300.001000False
20638-121.3239.4318.01860.0409.0741.0349.01.867284700.001000False
20639-121.2439.3716.02785.0616.01387.0530.02.388689400.001000False
\n", "

19369 rows × 15 columns

\n", "
" ], "text/plain": [ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", "0 -122.23 37.88 41.0 880.0 129.0 \n", "1 -122.22 37.86 21.0 7099.0 1106.0 \n", "2 -122.24 37.85 52.0 1467.0 190.0 \n", "3 -122.25 37.85 52.0 1274.0 235.0 \n", "4 -122.25 37.85 52.0 1627.0 280.0 \n", "... ... ... ... ... ... \n", "20635 -121.09 39.48 25.0 1665.0 374.0 \n", "20636 -121.21 39.49 18.0 697.0 150.0 \n", "20637 -121.22 39.43 17.0 2254.0 485.0 \n", "20638 -121.32 39.43 18.0 1860.0 409.0 \n", "20639 -121.24 39.37 16.0 2785.0 616.0 \n", "\n", " population households median_income expected_house_value \\\n", "0 322.0 126.0 8.3252 452600.0 \n", "1 2401.0 1138.0 8.3014 358500.0 \n", "2 496.0 177.0 7.2574 352100.0 \n", "3 558.0 219.0 5.6431 341300.0 \n", "4 565.0 259.0 3.8462 342200.0 \n", "... ... ... ... ... \n", "20635 845.0 330.0 1.5603 78100.0 \n", "20636 356.0 114.0 2.5568 77100.0 \n", "20637 1007.0 433.0 1.7000 92300.0 \n", "20638 741.0 349.0 1.8672 84700.0 \n", "20639 1387.0 530.0 2.3886 89400.0 \n", "\n", " ocean_proximity_<1H OCEAN ocean_proximity_INLAND \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "20635 0 1 \n", "20636 0 1 \n", "20637 0 1 \n", "20638 0 1 \n", "20639 0 1 \n", "\n", " ocean_proximity_ISLAND ocean_proximity_NEAR BAY \\\n", "0 0 1 \n", "1 0 1 \n", "2 0 1 \n", "3 0 1 \n", "4 0 1 \n", "... ... ... \n", "20635 0 0 \n", "20636 0 0 \n", "20637 0 0 \n", "20638 0 0 \n", "20639 0 0 \n", "\n", " ocean_proximity_NEAR OCEAN above_median \n", "0 0 True \n", "1 0 True \n", "2 0 True \n", "3 0 True \n", "4 0 True \n", "... ... ... \n", "20635 0 False \n", "20636 0 False \n", "20637 0 False \n", "20638 0 False \n", "20639 0 False \n", "\n", "[19369 rows x 15 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Crear una nueva variable boolean 'above_median'\n", "media_expexted_house_value = housing_procesado['expected_house_value'].mean()\n", "housing_procesado['above_median'] = housing_procesado['expected_house_value'] > media_expexted_house_value\n", "housing_procesado" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Aplicar los pasos train-test-split para poder entrenar y evaluar el clasificador\n", "X = housing_procesado.drop(columns=['expected_house_value']).to_numpy().astype('float')\n", "y = housing_procesado['expected_house_value'].ravel()\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Se crea al clasificador\n", "clasificador_reg_log = LogisticRegression(random_state=0, solver='liblinear')\n", "# Se entrena al clasificador\n", "clasificador_reg_log.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# accuracy\n", "print('accuracy del clasificador: {0:.2f}'.format(accuracy_score(y_test, clasificador_reg_log.predict(X_test))))\n", "# confusion matrix\n", "print('matriz de confusión del clasificador: \\n {0}'.format(confusion_matrix(y_test, clasificador_reg_log.predict(X_test))))\n", "# precision \n", "print('precision del clasificador: {0:.2f}'.format(precision_score(y_test, clasificador_reg_log.predict(X_test))))\n", "# recall \n", "print('recall del clasificador: {0:.2f}'.format(recall_score(y_test, clasificador_reg_log.predict(X_test))))\n", "# f1\n", "print('f1 del clasificador: {0:.2f}'.format(f1_score(y_test, clasificador_reg_log.predict(X_test))))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 }